cmd: defer stating model info until necessary (#5248)
This commit changes the 'ollama run' command to defer fetching model information until it really needs it. That is, when in interactive mode. It also removes one such case where the model information is fetch in duplicate, just before calling generateInteractive and then again, first thing, in generateInteractive. This positively impacts the performance of the command: ; time ./before run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./before run llama3 'hi' 0.02s user 0.01s system 2% cpu 1.168 total ; time ./before run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./before run llama3 'hi' 0.02s user 0.01s system 2% cpu 1.220 total ; time ./before run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./before run llama3 'hi' 0.02s user 0.01s system 2% cpu 1.217 total ; time ./after run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./after run llama3 'hi' 0.02s user 0.01s system 4% cpu 0.652 total ; time ./after run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./after run llama3 'hi' 0.01s user 0.01s system 5% cpu 0.498 total ; time ./after run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with or would you like to chat? ./after run llama3 'hi' 0.01s user 0.01s system 3% cpu 0.479 total ; time ./after run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./after run llama3 'hi' 0.02s user 0.01s system 5% cpu 0.507 total ; time ./after run llama3 'hi' Hi! It's nice to meet you. Is there something I can help you with, or would you like to chat? ./after run llama3 'hi' 0.02s user 0.01s system 5% cpu 0.507 total
This commit is contained in:
parent
ccef9431c8
commit
2aa91a937b
2 changed files with 46 additions and 70 deletions
65
cmd/cmd.go
65
cmd/cmd.go
|
@ -287,38 +287,12 @@ func createBlob(cmd *cobra.Command, client *api.Client, path string) (string, er
|
|||
}
|
||||
|
||||
func RunHandler(cmd *cobra.Command, args []string) error {
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
name := args[0]
|
||||
|
||||
// check if the model exists on the server
|
||||
show, err := client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
||||
var statusError api.StatusError
|
||||
switch {
|
||||
case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
|
||||
if err := PullHandler(cmd, []string{name}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
show, err = client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case err != nil:
|
||||
return err
|
||||
}
|
||||
|
||||
interactive := true
|
||||
|
||||
opts := runOptions{
|
||||
Model: args[0],
|
||||
WordWrap: os.Getenv("TERM") == "xterm-256color",
|
||||
Options: map[string]interface{}{},
|
||||
MultiModal: slices.Contains(show.Details.Families, "clip"),
|
||||
ParentModel: show.Details.ParentModel,
|
||||
Model: args[0],
|
||||
WordWrap: os.Getenv("TERM") == "xterm-256color",
|
||||
Options: map[string]interface{}{},
|
||||
}
|
||||
|
||||
format, err := cmd.Flags().GetString("format")
|
||||
|
@ -362,11 +336,38 @@ func RunHandler(cmd *cobra.Command, args []string) error {
|
|||
}
|
||||
opts.WordWrap = !nowrap
|
||||
|
||||
if !interactive {
|
||||
return generate(cmd, opts)
|
||||
// Fill out the rest of the options based on information about the
|
||||
// model.
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return generateInteractive(cmd, opts)
|
||||
name := args[0]
|
||||
info, err := func() (*api.ShowResponse, error) {
|
||||
showReq := &api.ShowRequest{Name: name}
|
||||
info, err := client.Show(cmd.Context(), showReq)
|
||||
var se api.StatusError
|
||||
if errors.As(err, &se) && se.StatusCode == http.StatusNotFound {
|
||||
if err := PullHandler(cmd, []string{name}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return client.Show(cmd.Context(), &api.ShowRequest{Name: name})
|
||||
}
|
||||
return info, err
|
||||
}()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
opts.MultiModal = slices.Contains(info.Details.Families, "clip")
|
||||
opts.ParentModel = info.Details.ParentModel
|
||||
opts.Messages = append(opts.Messages, info.Messages...)
|
||||
|
||||
if interactive {
|
||||
return generateInteractive(cmd, opts)
|
||||
}
|
||||
return generate(cmd, opts)
|
||||
}
|
||||
|
||||
func errFromUnknownKey(unknownKeyErr error) error {
|
||||
|
|
|
@ -31,65 +31,40 @@ const (
|
|||
)
|
||||
|
||||
func loadModel(cmd *cobra.Command, opts *runOptions) error {
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
p := progress.NewProgress(os.Stderr)
|
||||
defer p.StopAndClear()
|
||||
|
||||
spinner := progress.NewSpinner("")
|
||||
p.Add("", spinner)
|
||||
|
||||
showReq := api.ShowRequest{Name: opts.Model}
|
||||
showResp, err := client.Show(cmd.Context(), &showReq)
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
opts.MultiModal = slices.Contains(showResp.Details.Families, "clip")
|
||||
opts.ParentModel = showResp.Details.ParentModel
|
||||
|
||||
if len(showResp.Messages) > 0 {
|
||||
opts.Messages = append(opts.Messages, showResp.Messages...)
|
||||
}
|
||||
|
||||
chatReq := &api.ChatRequest{
|
||||
Model: opts.Model,
|
||||
Messages: []api.Message{},
|
||||
Model: opts.Model,
|
||||
KeepAlive: opts.KeepAlive,
|
||||
}
|
||||
|
||||
if opts.KeepAlive != nil {
|
||||
chatReq.KeepAlive = opts.KeepAlive
|
||||
}
|
||||
|
||||
err = client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
|
||||
return client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
|
||||
p.StopAndClear()
|
||||
if len(opts.Messages) > 0 {
|
||||
for _, msg := range opts.Messages {
|
||||
switch msg.Role {
|
||||
case "user":
|
||||
fmt.Printf(">>> %s\n", msg.Content)
|
||||
case "assistant":
|
||||
state := &displayResponseState{}
|
||||
displayResponse(msg.Content, opts.WordWrap, state)
|
||||
fmt.Println()
|
||||
fmt.Println()
|
||||
}
|
||||
for _, msg := range opts.Messages {
|
||||
switch msg.Role {
|
||||
case "user":
|
||||
fmt.Printf(">>> %s\n", msg.Content)
|
||||
case "assistant":
|
||||
state := &displayResponseState{}
|
||||
displayResponse(msg.Content, opts.WordWrap, state)
|
||||
fmt.Println()
|
||||
fmt.Println()
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func generateInteractive(cmd *cobra.Command, opts runOptions) error {
|
||||
opts.Messages = make([]api.Message, 0)
|
||||
|
||||
err := loadModel(cmd, &opts)
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
Loading…
Reference in a new issue